# "Registering Theory-based Predictions in Political Science"
# PS: Political Science & Politics
# Andrew Cesare Miller

# Load libraries
library(RColorBrewer)
library(ggplot2)
library(ggforce)

# Set working directory to script location
setwd(dirname(rstudioapi::getSourceEditorContext()$path))

#### Figure 1: Falsifiability of Predictions in International Relations Articles ####

# Upload article prediction data from Fomin et al. (2021) 
p <- read.csv("./Fomin_etal_Data.csv",stringsAsFactors = F)

# Upload coding of article falsifiability
pc <- read.csv("./Article_Predictions.csv",stringsAsFactors = F)

# Calculate the number of articles with falsifiable predictions
pm <- subset(pc, Timeframe!="Not Given" & Prediction.Result!="Unmeasurable")
total.falsifiable <- length(unique(pm$Title)); total.falsifiable

# Create table for plot
pd <- data.frame(category=factor(c("prediction","specific","time period","measurable"),
         levels=c("prediction","specific","time period","measurable")),
         total=c(
            sum(p$is_prediction,na.rm=T),       
            sum(p$is_idioscopic,na.rm=T),
            sum(p$explicit_period,na.rm=T),
            total.falsifiable))
pd$proportion <- pd$total/817  

# Plot of predictive articles
pd$r <- sqrt(pd$total / pi)
pd$x0 <- max(pd$r) / 2
pd$y0 <- pd$r
plot_article <- ggplot(pd, aes(x0 = x0, y0 = y0, r = r, fill = category)) +
  geom_circle() + theme_void() +
  scale_fill_manual(name = "Articles with:",values=c("grey90", "grey60","grey30","grey0"),
                    labels = c("Prediction", 
                               "Prediction of events",
                               "Prediction with time period",
                               "Falsifiable prediction"))+
  annotate("text", label = "817 Articles", x = 8, y = 22)+
  annotate("text", label = "201 Articles \n (25%)", x = 8, y = 10)+
  annotate("text", label = "27 Articles \n (3%)", x = 23, y = 4)+
  annotate("text", label = "12 Articles \n (1%)", x = 23, y = 0)+
  geom_segment(aes(x = 10, y = 4.5, xend = 19.75, yend = 4.5))+
  geom_segment(aes(x = 10, y = 1, xend = 19.25, yend = 1)); print(plot_article)

#### Figure 2: Predictive Content about the Russian Invasion of Ukraine on Twitter ####

# Load twitter predictions
t <- read.csv("./Twitter_Predictions.csv",stringsAsFactors = F)
t$Prediction.Type <- factor(t$Prediction.Type,
            levels=c("Prediction","Policy Input","Predictive Claim"))

# Calculate proportion
td <- data.frame(table(t$Prediction.Type))
names(td) <- c("Category","Number")
td$Category <- factor(td$Category,levels=c("Prediction","Policy Input","Predictive Claim"))
td$Proportion <- td$Number/sum(td$Number); td

# Create plot
plot_tweet <- ggplot(t, aes(Prediction.Type)) +
  geom_bar(stat="count")+
  annotate("segment",x = 2, y = 25, xend = 2.5, yend = 35,size=.25)+
  geom_label(aes(x = 1, y = 38+5, label = "45%"), size=3,fill = "gray90")+
  geom_label(aes(x = 2, y = 20+5, label = "26%"), size=3,fill = "gray90")+
  geom_label(aes(x = 3, y = 23+5, label = "29%"), size=3,fill = "gray90")+
  geom_label(aes(x = 2.5, y = 35, label = "Proportion of tweets\nwithin category"), size=3,fill = "gray90")+
  xlab("Category of Predictive Content")+ylab("Number of Tweets")+
  theme_bw(); suppressWarnings(print(plot_tweet))
